Consumption Model For A Week

Assumption: consumption = func(individual components, future weather, future humidity)

Get Data

result = query_api.query_data_frame(org=org, query=query)
result.columns
Index(['result', 'table', '_start', '_stop', '_time', '_value', '_field',
       '_measurement', 'device', 'sensor', 'units'],
      dtype='object')
result.shape
(4182, 11)

result.to_csv(f’./Data/energy_info.csv’, index=False) #save the data as a csv

result.drop(['result', 'table', '_field', '_start', '_stop', '_measurement', 'units', 'device'], axis=1, inplace=True)
result.head()
_time _value sensor
0 2023-03-02 00:00:00+00:00 12.888000 Aircon
1 2023-03-03 00:00:00+00:00 121.541007 Aircon
2 2023-03-04 00:00:00+00:00 222.993806 Aircon
3 2023-03-05 00:00:00+00:00 118.435757 Aircon
4 2023-03-06 00:00:00+00:00 238.721389 Aircon
result.columns
Index(['_time', '_value', 'sensor'], dtype='object')

Energy in Wide Form

wide_df = result.pivot(index='_time', columns='sensor', values='_value')
# Reset the index to make 'id' a regular column
wide_df.reset_index(inplace=True)

wide_df.columns
Index(['_time', 'Aircon', 'Consumption', 'ConsumptionNet', 'Export', 'Fridge',
       'Garage', 'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR',
       'HouseBoardR', 'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints',
       'Production', 'Unmonitored'],
      dtype='object', name='sensor')
wide_df['_time'] = pd.to_datetime(wide_df['_time']).dt.tz_localize(None)
wide_df.drop(['Unmonitored'], axis = 1, inplace=True)
wide_df.rename(columns={'_time':'ds'}, inplace=True)
# Delete the last row
wide_df = wide_df.drop(wide_df.index[-1])
wide_df_new = pd.DataFrame()
for column in wide_df.columns:
    wide_df_new[f'{column}'] = wide_df[f'{column}'].interpolate()

wide_df = pd.DataFrame(wide_df_new)
wide_df.columns
Index(['ds', 'Aircon', 'Consumption', 'ConsumptionNet', 'Export', 'Fridge',
       'Garage', 'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR',
       'HouseBoardR', 'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints',
       'Production'],
      dtype='object')
wide_df.tail()
ds Aircon Consumption ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR HouseBoardR Import Lights Net OvenStove Powerpoints Production
240 2023-10-28 8.578005 357.275503 866.654129 180.242247 25.492343 24.017087 0.530865 999.775552 333.258545 366.408449 71.876417 80.009421 -108.365830 66.086281 176.392548 491.748285
241 2023-10-29 8.661682 179.501779 866.654129 806.610093 19.634587 2.731102 0.192283 530.312474 176.770805 201.397413 1.974071 60.108551 -804.636021 1.253300 103.451046 1017.375874
242 2023-10-30 8.603354 263.812291 866.654129 757.601472 27.081000 0.983716 0.183639 788.486015 262.828480 290.616882 3.378339 66.827993 -754.223132 1.356785 178.170521 1052.272015
243 2023-10-31 9.032917 311.552394 866.654129 803.841970 35.649472 11.163953 0.269313 901.165305 300.388379 319.795146 5.027126 67.972347 -798.814844 30.217549 166.649785 1142.661342
244 2023-11-01 8.743938 211.339011 866.654129 898.594991 30.389375 3.655479 0.189410 623.050504 207.683462 245.486118 2.309883 50.624125 -896.285107 0.213375 145.941312 1134.978184
def data_scaler(df):
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler

    numeric_columns = df.select_dtypes(include=['number']).columns
        
    # Scale numeric columns
    df[numeric_columns] = MinMaxScaler().fit_transform(df[numeric_columns])

    return df
def long_form(df_plot):
    return df_plot.melt('ds', var_name='unique_id', value_name='y')

def altair_plot(df_plot): 
    import altair as alt
    highlight = alt.selection_point(on='mouseover', fields=['unique_id'], bind='legend', nearest=True)

    base = alt.Chart(df_plot).encode(
        x='ds:T',
        y='y:Q',
        color='unique_id:N'
    )

    points = base.mark_circle().encode(
        opacity=alt.value(0)
    ).add_params(
        highlight
    ).properties(
        width=1000
    )

    lines = base.mark_line().encode(
        size=alt.condition(~highlight, alt.value(1), alt.value(3))
    )

    return points + lines
wide_df = data_scaler(wide_df)
wide_df.tail()
ds Aircon Consumption ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR HouseBoardR Import Lights Net OvenStove Powerpoints Production
240 2023-10-28 0.000327 0.295413 0.639383 0.087269 0.054518 0.475787 0.000988 0.441317 0.270647 0.263152 0.098826 0.397064 0.702175 0.217335 0.451213 0.164710
241 2023-10-29 0.000459 0.096052 0.639383 0.390543 0.008991 0.049113 0.000167 0.155476 0.095349 0.096613 0.001842 0.287229 0.452268 0.003559 0.195780 0.385757
242 2023-10-30 0.000367 0.190600 0.639383 0.366814 0.066865 0.014087 0.000146 0.312670 0.191751 0.186659 0.003790 0.324314 0.470363 0.003900 0.457440 0.400433
243 2023-10-31 0.001045 0.244137 0.639383 0.389203 0.133461 0.218148 0.000354 0.381277 0.233826 0.216107 0.006078 0.330630 0.454358 0.099064 0.417095 0.438445
244 2023-11-01 0.000589 0.131755 0.639383 0.435080 0.092579 0.067642 0.000160 0.211942 0.129978 0.141110 0.002308 0.234883 0.419373 0.000130 0.344577 0.435214
df = long_form(wide_df)
wide_df.rename(columns={'Consumption': 'y'}, inplace=True)
wide_df['unique_id'] = 'Consumption'
df.head()
ds unique_id y
0 2023-03-02 Aircon 0.007134
1 2023-03-03 Aircon 0.178748
2 2023-03-04 Aircon 0.338990
3 2023-03-05 Aircon 0.173844
4 2023-03-06 Aircon 0.363831

Weather data

import pandas as pd
path = 'Data'
name = 'weather'

weather_df = pd.read_csv(f'{path}/{name}.csv')
weather_df.head()
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-01-01 00:00 21.6 0 12.6 126 1013.0 0.0 84 73 21.6 21.6 24.4 18.7 0 10.0 19.8 1.0
1 2023-01-01 01:00 21.3 0 11.9 133 1013.0 0.0 85 77 21.3 21.3 24.3 18.7 0 10.0 18.7 1.0
2 2023-01-01 02:00 21.1 0 11.2 140 1012.0 0.0 86 80 21.1 21.1 24.2 18.7 0 10.0 17.6 1.0
3 2023-01-01 03:00 20.8 0 10.4 147 1012.0 0.0 88 84 20.8 20.8 20.8 18.7 0 10.0 16.6 1.0
4 2023-01-01 04:00 21.0 0 10.6 144 1012.0 0.0 86 84 21.0 21.0 21.0 18.6 0 10.0 16.3 1.0
weather_df = data_scaler(weather_df)
def data_day_avg(weather_df):
    from tqdm.notebook import tqdm
    avg_df = pd.DataFrame()
    dates = pd.to_datetime(weather_df['ds']).dt.date.unique()
    
    for date in tqdm(dates):
        filtered_df = weather_df[pd.to_datetime(weather_df['ds']).dt.date == date]
        ds = filtered_df.pop('ds')

        filtered_df = pd.DataFrame(filtered_df.mean()).T
        filtered_df

        filtered_df.insert(0, 'ds', date)

        avg_df = pd.concat([avg_df, filtered_df], ignore_index=True)
    
    return avg_df
weather_avg_df = data_day_avg(weather_df)
weather_avg_df['ds'] = pd.to_datetime(weather_avg_df['ds'])
weather_avg_df
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-01-01 0.566503 0.583333 0.459098 0.345636 0.428728 0.000895 0.670886 0.583750 0.516791 0.595085 0.512881 0.778121 0.0 1.000000 0.434678 0.422619
1 2023-01-02 0.554902 0.583333 0.451453 0.396936 0.359649 0.001253 0.602848 0.409167 0.499005 0.584249 0.486067 0.725707 0.0 1.000000 0.434844 0.428571
2 2023-01-03 0.582516 0.583333 0.263889 0.415506 0.258772 0.000000 0.584916 0.287917 0.525746 0.610043 0.502366 0.738663 0.0 1.000000 0.283533 0.452381
3 2023-01-04 0.650490 0.583333 0.313073 0.382660 0.163377 0.000895 0.687764 0.196250 0.608582 0.673535 0.609621 0.866902 0.0 0.994792 0.305777 0.500000
4 2023-01-05 0.654575 0.583333 0.257518 0.506267 0.166667 0.026859 0.735232 0.700417 0.629726 0.677350 0.608701 0.911808 0.0 0.989583 0.288098 0.416667
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
339 2023-12-06 0.580556 0.583333 0.254077 0.264740 0.558114 0.000000 0.608650 0.174583 0.519527 0.608364 0.515116 0.745878 0.0 1.000000 0.214558 0.470238
340 2023-12-07 0.605065 0.583333 0.266565 0.202066 0.565789 0.000000 0.582806 0.050000 0.543532 0.631258 0.533386 0.748233 0.0 1.000000 0.222693 0.476190
341 2023-12-08 0.618954 0.583333 0.349261 0.251625 0.575658 0.000018 0.626582 0.342083 0.561816 0.644078 0.555205 0.795200 0.0 1.000000 0.285940 0.476190
342 2023-12-09 0.612582 0.583333 0.362768 0.190460 0.544956 0.000107 0.577532 0.497500 0.564179 0.638584 0.539432 0.757509 0.0 1.000000 0.308267 0.428571
343 2023-12-10 0.630556 0.583333 0.513507 0.233751 0.530702 0.000233 0.515295 0.537500 0.571269 0.655067 0.546924 0.730271 0.0 1.000000 0.426627 0.452381

344 rows × 17 columns

(
    wide_df[['y']].plot(title='Consumption')
)
<Axes: title={'center': 'Consumption'}>

from statsforecast import StatsForecast
/home/ben/mambaforge/envs/cfast/lib/python3.11/site-packages/statsforecast/core.py:25: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)
  from tqdm.autonotebook import tqdm
from pylab import rcParams
rcParams['figure.figsize'] = (15,4)
StatsForecast.plot(df, engine='plotly')
/home/ben/mambaforge/envs/cfast/lib/python3.11/site-packages/_plotly_utils/basevalidators.py:105: FutureWarning:

The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result
wide_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245 entries, 0 to 244
Data columns (total 18 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   ds                  245 non-null    datetime64[ns]
 1   Aircon              245 non-null    float64       
 2   y                   245 non-null    float64       
 3   ConsumptionNet      198 non-null    float64       
 4   Export              245 non-null    float64       
 5   Fridge              245 non-null    float64       
 6   Garage              245 non-null    float64       
 7   Hotwater            245 non-null    float64       
 8   HouseBoardOutside   161 non-null    float64       
 9   HouseBoardOutsideR  245 non-null    float64       
 10  HouseBoardR         245 non-null    float64       
 11  Import              245 non-null    float64       
 12  Lights              245 non-null    float64       
 13  Net                 245 non-null    float64       
 14  OvenStove           245 non-null    float64       
 15  Powerpoints         245 non-null    float64       
 16  Production          245 non-null    float64       
 17  unique_id           245 non-null    object        
dtypes: datetime64[ns](1), float64(16), object(1)
memory usage: 34.6+ KB
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3920 entries, 0 to 3919
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   ds         3920 non-null   datetime64[ns]
 1   unique_id  3920 non-null   object        
 2   y          3789 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 92.0+ KB
weather_avg_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   ds              344 non-null    datetime64[ns]
 1   temp_c          344 non-null    float64       
 2   is_day          344 non-null    float64       
 3   wind_kph        344 non-null    float64       
 4   wind_degree     344 non-null    float64       
 5   pressure_mb     344 non-null    float64       
 6   precip_mm       344 non-null    float64       
 7   humidity        344 non-null    float64       
 8   cloud           344 non-null    float64       
 9   feelslike_c     344 non-null    float64       
 10  windchill_c     344 non-null    float64       
 11  heatindex_c     344 non-null    float64       
 12  dewpoint_c      344 non-null    float64       
 13  chance_of_rain  344 non-null    float64       
 14  vis_km          344 non-null    float64       
 15  gust_kph        344 non-null    float64       
 16  uv              344 non-null    float64       
dtypes: datetime64[ns](1), float64(16)
memory usage: 45.8 KB
date_range_start, date_range_end  = wide_df['ds'].min(), wide_df['ds'].max()
# Filter rows between the start and end dates
weather_avg_filtered_df = weather_avg_df[(weather_avg_df['ds'] >= date_range_start) & (weather_avg_df['ds'] <= date_range_end)]
weather_avg_filtered_df
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
60 2023-03-02 0.644444 0.541667 0.294980 0.153784 0.344298 0.001056 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
61 2023-03-03 0.603268 0.541667 0.483563 0.386142 0.403509 0.002131 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
62 2023-03-04 0.561765 0.541667 0.574669 0.433612 0.425439 0.000161 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
63 2023-03-05 0.598039 0.541667 0.372324 0.350627 0.427632 0.000358 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
64 2023-03-06 0.629575 0.541667 0.313073 0.290854 0.417763 0.000358 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
300 2023-10-28 0.379575 0.541667 0.700051 0.446611 0.708333 0.000233 0.500000 0.389167 0.351741 0.414835 0.327681 0.497939 0.0 1.0 0.588811 0.345238
301 2023-10-29 0.382516 0.541667 0.383028 0.320682 0.656798 0.000125 0.524789 0.433750 0.347637 0.423230 0.329390 0.525324 0.0 1.0 0.328934 0.345238
302 2023-10-30 0.441340 0.583333 0.396152 0.435237 0.562500 0.000000 0.583861 0.181250 0.403483 0.478327 0.373686 0.611307 0.0 1.0 0.340139 0.416667
303 2023-10-31 0.550163 0.583333 0.489424 0.549559 0.442982 0.000000 0.570675 0.025417 0.492537 0.579976 0.467140 0.687132 0.0 1.0 0.470286 0.464286
304 2023-11-01 0.510784 0.583333 0.561672 0.445218 0.505482 0.000340 0.599156 0.480000 0.458209 0.543040 0.435594 0.685218 0.0 1.0 0.467795 0.404762

245 rows × 17 columns

altair_plot(df)
weather_avg_df_plot = long_form(weather_avg_filtered_df)
altair_plot(weather_avg_df_plot)
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
wide_df["y"]
0      0.355157
1      0.291076
2      0.498418
3      0.418824
4      0.561599
         ...   
240    0.295413
241    0.096052
242    0.190600
243    0.244137
244    0.131755
Name: y, Length: 245, dtype: float64
fig, axs = plt.subplots(nrows=1, ncols=2)

plot_acf(wide_df["y"],  lags=28, ax=axs[0],color="fuchsia")
axs[0].set_title("Autocorrelation");

plot_pacf(wide_df["y"],  lags=28, ax=axs[1],color="lime")
axs[1].set_title('Partial Autocorrelation')

plt.show();

fig, axs = plt.subplots(nrows=1, ncols=2)

plot_acf(wide_df["y"],  lags=14, ax=axs[0],color="fuchsia")
axs[0].set_title("Autocorrelation");

plot_pacf(wide_df["y"],  lags=14, ax=axs[1],color="lime")
axs[1].set_title('Partial Autocorrelation')

plt.show();

from statsmodels.tsa.seasonal import seasonal_decompose 
a = seasonal_decompose(wide_df["y"], model = "additive", period=28)
a.plot();

Designing Models

weather_avg_filtered_df.head()
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
60 2023-03-02 0.644444 0.541667 0.294980 0.153784 0.344298 0.001056 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
61 2023-03-03 0.603268 0.541667 0.483563 0.386142 0.403509 0.002131 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
62 2023-03-04 0.561765 0.541667 0.574669 0.433612 0.425439 0.000161 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
63 2023-03-05 0.598039 0.541667 0.372324 0.350627 0.427632 0.000358 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
64 2023-03-06 0.629575 0.541667 0.313073 0.290854 0.417763 0.000358 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190
wide_df = pd.merge(wide_df, weather_avg_filtered_df, on='ds', how='outer')
wide_df.head()
ds Aircon y ConsumptionNet Export Fridge Garage Hotwater HouseBoardOutside HouseBoardOutsideR ... humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv
0 2023-03-02 0.007134 0.355157 NaN 0.016595 0.814609 0.003794 0.000287 NaN 0.356702 ... 0.675633 0.345417 0.609826 0.667888 0.594374 0.845112 0.0 1.0 0.250332 0.470238
1 2023-03-03 0.178748 0.291076 NaN 0.165011 0.669709 0.015031 0.000143 NaN 0.292065 ... 0.682489 0.579167 0.572388 0.629426 0.548107 0.821555 0.0 1.0 0.432022 0.380952
2 2023-03-04 0.338990 0.498418 NaN 0.294620 0.686509 0.059559 0.000460 NaN 0.496691 ... 0.604430 0.364583 0.509204 0.590659 0.491982 0.734982 0.0 1.0 0.485309 0.422619
3 2023-03-05 0.173844 0.418824 NaN 0.302607 0.701288 0.009916 0.000256 NaN 0.419958 ... 0.603903 0.401250 0.542537 0.624542 0.531677 0.761042 0.0 1.0 0.319804 0.428571
4 2023-03-06 0.363831 0.561599 NaN 0.381220 0.773714 0.008798 0.002112 NaN 0.562640 ... 0.608650 0.157500 0.568284 0.653999 0.557965 0.785925 0.0 1.0 0.263363 0.476190

5 rows × 34 columns

horizon = 10

train_size = len(wide_df) - horizon
train, test = wide_df[:train_size], wide_df[train_size:]


wide_df.shape, train.shape, test.shape
((245, 34), (235, 34), (10, 34))
futr_df=weather_avg_filtered_df[train_size:]
futr_df['unique_id'] = 'Consumption'
futr_df.shape
/tmp/ipykernel_1270281/3311167277.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
(10, 18)
futr_df
ds temp_c is_day wind_kph wind_degree pressure_mb precip_mm humidity cloud feelslike_c windchill_c heatindex_c dewpoint_c chance_of_rain vis_km gust_kph uv unique_id
295 2023-10-23 0.560784 0.500000 0.579256 0.550836 0.439693 0.000000 0.614979 0.075000 0.509328 0.589896 0.485279 0.734246 0.0 1.000000 0.512699 0.392857 Consumption
296 2023-10-24 0.550490 0.541667 0.571738 0.437210 0.500000 0.000430 0.684599 0.150833 0.503483 0.580128 0.486593 0.771349 0.0 1.000000 0.495933 0.410714 Consumption
297 2023-10-25 0.558660 0.541667 0.585117 0.546657 0.482456 0.000000 0.656646 0.073750 0.511070 0.588217 0.487119 0.759570 0.0 1.000000 0.513197 0.428571 Consumption
298 2023-10-26 0.495261 0.541667 0.518349 0.551996 0.493421 0.005766 0.684072 0.593333 0.449627 0.528541 0.422581 0.725265 0.0 0.953125 0.448705 0.363095 Consumption
299 2023-10-27 0.309314 0.541667 0.621814 0.516481 0.633772 0.008792 0.767932 0.862500 0.289552 0.355311 0.249211 0.616166 0.0 0.994792 0.573373 0.232143 Consumption
300 2023-10-28 0.379575 0.541667 0.700051 0.446611 0.708333 0.000233 0.500000 0.389167 0.351741 0.414835 0.327681 0.497939 0.0 1.000000 0.588811 0.345238 Consumption
301 2023-10-29 0.382516 0.541667 0.383028 0.320682 0.656798 0.000125 0.524789 0.433750 0.347637 0.423230 0.329390 0.525324 0.0 1.000000 0.328934 0.345238 Consumption
302 2023-10-30 0.441340 0.583333 0.396152 0.435237 0.562500 0.000000 0.583861 0.181250 0.403483 0.478327 0.373686 0.611307 0.0 1.000000 0.340139 0.416667 Consumption
303 2023-10-31 0.550163 0.583333 0.489424 0.549559 0.442982 0.000000 0.570675 0.025417 0.492537 0.579976 0.467140 0.687132 0.0 1.000000 0.470286 0.464286 Consumption
304 2023-11-01 0.510784 0.583333 0.561672 0.445218 0.505482 0.000340 0.599156 0.480000 0.458209 0.543040 0.435594 0.685218 0.0 1.000000 0.467795 0.404762 Consumption
from neuralforecast import models
models.__all__
['RNN',
 'GRU',
 'LSTM',
 'TCN',
 'DeepAR',
 'DilatedRNN',
 'MLP',
 'NHITS',
 'NBEATS',
 'NBEATSx',
 'TFT',
 'VanillaTransformer',
 'Informer',
 'Autoformer',
 'PatchTST',
 'FEDformer',
 'StemGNN',
 'HINT',
 'TimesNet']
from ray import tune

from neuralforecast.core import NeuralForecast
from neuralforecast.auto import AutoMLP
from neuralforecast.models import *
model_list = [RNN,
 GRU,
 LSTM,
 TCN,
 #DeepAR,  # not good
 DilatedRNN,
 MLP,
 NHITS,
 NBEATS,
 NBEATSx,
 #TFT, # too much GPU
 #VanillaTransformer, # not good
 #Informer,
 #Autoformer,
 #PatchTST, # too much GPU
 #FEDformer, #taken too long
 #StemGNN, #need n_series
 #HINT, #need n_series
 #TimesNet # takes too long
             ]
wide_df.columns
Index(['ds', 'Aircon', 'y', 'ConsumptionNet', 'Export', 'Fridge', 'Garage',
       'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR', 'HouseBoardR',
       'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints', 'Production',
       'unique_id', 'temp_c', 'is_day', 'wind_kph', 'wind_degree',
       'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
       'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain', 'vis_km',
       'gust_kph', 'uv'],
      dtype='object')
models = [model(input_size=2 * horizon,
                h=horizon,
                max_steps=500,
                hist_exog_list =['Aircon', 'Export', 'Fridge', 'Garage',
                                 'Hotwater', 
                                 'HouseBoardR','Import', 'Lights', 'Net', 'OvenStove',
                                 'Powerpoints', 'Production'
                                ],
                futr_exog_list = ['temp_c', 'is_day', 'wind_kph', 'wind_degree',
                    'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
                    'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain',
                    'vis_km', 'gust_kph', 'uv']
               ) for model in model_list]
models
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
[RNN, GRU, LSTM, TCN, DilatedRNN, MLP, NHITS, NBEATS, NBEATSx]
nf = NeuralForecast(
    models=models, 
    freq='D')
train.columns
Index(['ds', 'Aircon', 'y', 'ConsumptionNet', 'Export', 'Fridge', 'Garage',
       'Hotwater', 'HouseBoardOutside', 'HouseBoardOutsideR', 'HouseBoardR',
       'Import', 'Lights', 'Net', 'OvenStove', 'Powerpoints', 'Production',
       'unique_id', 'temp_c', 'is_day', 'wind_kph', 'wind_degree',
       'pressure_mb', 'precip_mm', 'humidity', 'cloud', 'feelslike_c',
       'windchill_c', 'heatindex_c', 'dewpoint_c', 'chance_of_rain', 'vis_km',
       'gust_kph', 'uv'],
      dtype='object')
nf.fit(df=train)
consumption_predict = nf.predict(futr_df=futr_df).reset_index()
consumption_predict.head()
unique_id ds RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
0 Consumption 2023-10-23 0.187143 0.340722 0.270740 0.227037 0.262816 0.142264 0.266232 0.310167 0.164331
1 Consumption 2023-10-24 0.120339 0.247064 0.097423 0.069222 0.166623 0.137484 0.146339 0.264303 0.187077
2 Consumption 2023-10-25 0.106269 0.185992 0.218475 0.100015 0.174694 0.203210 0.233841 0.362458 0.266043
3 Consumption 2023-10-26 0.037814 0.153535 0.145480 0.042539 0.125000 0.265789 0.335360 0.289754 0.315103
4 Consumption 2023-10-27 0.021276 0.049343 -0.004718 0.034827 0.095918 0.238553 0.303091 0.309346 0.279027
consumption_predict.drop('unique_id', axis=1, inplace=True)

just_consumption = df[df['unique_id'] == 'Consumption']
consumption_predict_plot = long_form(consumption_predict)
consumption_predict_plot = pd.concat([consumption_predict_plot, just_consumption]
                                     , ignore_index=True)
consumption_predict_plot
ds unique_id y
0 2023-10-23 RNN 0.187143
1 2023-10-24 RNN 0.120339
2 2023-10-25 RNN 0.106269
3 2023-10-26 RNN 0.037814
4 2023-10-27 RNN 0.021276
... ... ... ...
330 2023-10-28 Consumption 0.295413
331 2023-10-29 Consumption 0.096052
332 2023-10-30 Consumption 0.190600
333 2023-10-31 Consumption 0.244137
334 2023-11-01 Consumption 0.131755

335 rows × 3 columns

altair_plot(consumption_predict_plot)

Performance

from datasetsforecast.losses import (mae, mape, mase, rmse, smape)
consumption_predict.info(), consumption_predict.columns
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   ds          10 non-null     datetime64[ns]
 1   RNN         10 non-null     float64       
 2   GRU         10 non-null     float64       
 3   LSTM        10 non-null     float64       
 4   TCN         10 non-null     float64       
 5   DilatedRNN  10 non-null     float64       
 6   MLP         10 non-null     float64       
 7   NHITS       10 non-null     float64       
 8   NBEATS      10 non-null     float64       
 9   NBEATSx     10 non-null     float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 932.0 bytes
(None,
 Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
        'NBEATS', 'NBEATSx'],
       dtype='object'))
def evaluate_performace(y_hist, y_true, y_pred):
    #y_true = y_true.merge(y_pred, how='left', on=['unique_id', 'ds'])
    evaluation = {}
    print(y_pred.columns)
    for i, model in enumerate(y_pred.columns):
        if i < 1: continue
        evaluation[model] = {}
        for metric in [mase, mae, mape, rmse, smape]:
            metric_name = metric.__name__
            if metric_name == 'mase':
                evaluation[model][metric_name] = metric(y_true['y'].values, 
                                                    y_pred[model].values, 
                                                    y_hist['y'].values, seasonality=12)
            else:
                evaluation[model][metric_name] = metric(y_true['y'].values, y_pred[model].values)
    return pd.DataFrame(evaluation)
score = evaluate_performace(train, test, consumption_predict)
score.style.background_gradient(cmap ='YlGn', axis = 1,low=0.5, high=0.6)
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')
  RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
mase 0.827192 0.475914 0.581997 0.854392 0.532540 0.891489 1.069189 1.064738 1.062052
mae 0.124269 0.071497 0.087433 0.128355 0.080004 0.133928 0.160624 0.159956 0.159552
mape 55.463221 31.560946 31.207467 48.447932 29.925153 68.742079 92.780892 92.143272 92.818673
rmse 0.164423 0.107169 0.156157 0.176625 0.128558 0.170495 0.195339 0.177750 0.192040
smape 76.016902 34.840016 47.025939 71.174894 34.164025 55.432298 60.084229 58.849896 60.377130
score = data_scaler(score.T)
score.plot(kind='bar', legend=True)
<Axes: >

results = evaluate_performace(train, test, consumption_predict)
results.to_csv('Data/Neutral_results5.csv')
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')

Cross_Validate

cv_df = nf.cross_validation(train,
                            n_windows=3,
                            step_size= 1,
                           )
cv_df.columns
Index(['unique_id', 'ds', 'cutoff', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN',
       'MLP', 'NHITS', 'NBEATS', 'NBEATSx', 'Aircon', 'y', 'ConsumptionNet',
       'Export', 'Fridge', 'Garage', 'Hotwater', 'HouseBoardOutside',
       'HouseBoardOutsideR', 'HouseBoardR', 'Import', 'Lights', 'Net',
       'OvenStove', 'Powerpoints', 'Production', 'temp_c', 'is_day',
       'wind_kph', 'wind_degree', 'pressure_mb', 'precip_mm', 'humidity',
       'cloud', 'feelslike_c', 'windchill_c', 'heatindex_c', 'dewpoint_c',
       'chance_of_rain', 'vis_km', 'gust_kph', 'uv'],
      dtype='object')
cv_df.drop('cutoff', axis=1, inplace=True)
cv_df.drop('unique_id', axis=1, inplace=True)
cv_df['RNN']
0     0.135751
1     0.069040
2     0.035342
3     0.191986
4     0.166608
5     0.172630
6     0.276067
7     0.224828
8     0.119483
9     0.142353
10    0.102038
11    0.065945
12    0.183238
13    0.203040
14    0.190365
15    0.209685
16    0.239724
17    0.131889
18    0.137529
19    0.309497
20    0.131816
21    0.200610
22    0.181522
23    0.179420
24    0.192814
25    0.191258
26    0.125263
27    0.109986
28    0.267251
29    0.237799
Name: RNN, dtype: float32
consumption_predict.columns
Index(['ds', 'RNN', 'GRU', 'LSTM', 'TCN', 'DilatedRNN', 'MLP', 'NHITS',
       'NBEATS', 'NBEATSx'],
      dtype='object')
cv_df_filter = cv_df[consumption_predict.columns]
cv_df_filter
ds RNN GRU LSTM TCN DilatedRNN MLP NHITS NBEATS NBEATSx
0 2023-10-11 0.135751 0.067367 0.110072 0.087748 0.100938 0.102914 0.160495 0.176175 0.128718
1 2023-10-12 0.069040 0.149712 0.197463 0.148317 0.126475 0.112232 0.078474 0.145379 0.088011
2 2023-10-13 0.035342 0.187814 0.194257 0.140832 0.177151 0.041507 0.077609 0.112854 0.052702
3 2023-10-14 0.191986 0.232270 0.253045 0.228376 0.199984 0.159770 0.178985 0.242857 0.067291
4 2023-10-15 0.166608 0.128854 0.169904 0.126402 0.164642 0.215704 0.143691 0.212753 0.167671
5 2023-10-16 0.172630 0.208335 0.227233 0.223335 0.202316 0.139277 0.256585 0.268092 0.178975
6 2023-10-17 0.276067 0.281430 0.284902 0.354908 0.246623 0.305722 0.252022 0.270245 0.243796
7 2023-10-18 0.224828 0.219993 0.226789 0.329371 0.222550 0.262789 0.249297 0.274197 0.279998
8 2023-10-19 0.119483 0.204906 0.162221 0.165711 0.210179 0.112409 0.100446 0.252383 0.193922
9 2023-10-20 0.142353 0.226974 0.191427 0.111255 0.154943 0.302589 0.209240 0.218997 0.152117
10 2023-10-12 0.102038 0.111689 0.155506 0.097696 0.139405 0.065505 0.063259 0.111341 0.082719
11 2023-10-13 0.065945 0.173949 0.210845 0.121179 0.138233 0.151942 0.068114 0.124901 0.112885
12 2023-10-14 0.183238 0.209708 0.224227 0.221133 0.225971 0.141812 0.198544 0.221697 0.127999
13 2023-10-15 0.203040 0.189177 0.187111 0.244232 0.242181 0.160846 0.196839 0.221572 0.114414
14 2023-10-16 0.190365 0.249911 0.216426 0.253370 0.215205 0.200827 0.154640 0.255481 0.214551
15 2023-10-17 0.209685 0.212194 0.246824 0.305890 0.202765 0.186043 0.265844 0.213276 0.161825
16 2023-10-18 0.239724 0.239608 0.244121 0.271035 0.233068 0.306250 0.258346 0.316249 0.219644
17 2023-10-19 0.131889 0.198312 0.168622 0.159480 0.207734 0.131938 0.148374 0.250731 0.293880
18 2023-10-20 0.137529 0.205980 0.201126 0.135570 0.194409 0.162717 0.189692 0.192537 0.217399
19 2023-10-21 0.309497 0.257568 0.222876 0.194708 0.225145 0.306709 0.249137 0.223473 0.185947
20 2023-10-13 0.131816 0.154035 0.225009 0.131973 0.137236 0.070659 0.026579 0.064938 -0.003257
21 2023-10-14 0.200610 0.267801 0.222852 0.250773 0.230686 0.252358 0.116113 0.220819 0.064967
22 2023-10-15 0.181522 0.219067 0.192574 0.274661 0.226209 0.140444 0.076887 0.129147 0.088821
23 2023-10-16 0.179420 0.209414 0.232633 0.256211 0.203172 0.187722 0.257174 0.165209 0.136125
24 2023-10-17 0.192814 0.198803 0.229261 0.268127 0.147232 0.298106 0.138862 0.254128 0.190610
25 2023-10-18 0.191258 0.202635 0.200670 0.332800 0.234716 0.180917 0.157915 0.141489 0.126197
26 2023-10-19 0.125263 0.125340 0.174479 0.155649 0.202877 0.137405 0.134489 0.214591 0.166119
27 2023-10-20 0.109986 0.172756 0.117033 0.113169 0.137168 0.212421 0.174950 0.158424 0.247471
28 2023-10-21 0.267251 0.264075 0.266374 0.239609 0.218864 0.225566 0.161354 0.198722 0.185505
29 2023-10-22 0.237799 0.199004 0.225758 0.263953 0.253231 0.243639 0.234858 0.233367 0.161174
cv_df_plot = long_form(cv_df_filter)
cv_df_plot
ds unique_id y
0 2023-10-11 RNN 0.135751
1 2023-10-12 RNN 0.069040
2 2023-10-13 RNN 0.035342
3 2023-10-14 RNN 0.191986
4 2023-10-15 RNN 0.166608
... ... ... ...
265 2023-10-18 NBEATSx 0.126197
266 2023-10-19 NBEATSx 0.166119
267 2023-10-20 NBEATSx 0.247471
268 2023-10-21 NBEATSx 0.185505
269 2023-10-22 NBEATSx 0.161174

270 rows × 3 columns

altair_plot(cv_df_plot)
Back to top